# Purpose: Produce the levels-matching movers plots (e.g., Figure 6)

source("constants.R")
source("movers_helpers.R") # a set of helper functions for this script

#############################################
# 1. Read in the datasets of interest #
#############################################

# See the README for instructions on how to access these data

# Create data for Syrian movers making Native friends
sy_to_native_dat <- read_csv("input/moves_levels_sy_to_native.csv")

# Create data for Native movers making Syrian friends
native_to_sy_dat <- read_csv("input/moves_levels_native_to_sy.csv")


################################################################
# 1A For revisions, make some results on distribution of moves #
################################################################

### Get the regional integration measures
regional_measures <- read_csv("output/regional_measures.csv") %>%
  filter(sample == "all", measure == "n_frnd_nat_lcl") %>%
  mutate(rel_frnd = sy_avg_resid/native_avg_resid) %>%
  select(nuts3, sy_avg_resid, sy_n, native_avg_resid, native_n, rel_frnd)

ntiles_dat <- regional_measures %>%
    mutate(
      nuts3_ntile_sy = ntiles.wtd(sy_avg_resid, 5, sy_n),
      nuts3_ntile_gf = ntiles.wtd(native_avg_resid, 5, native_n),
      nuts3_ntile_rf = ntiles.wtd(rel_frnd, 5, native_n)) %>%
    select(nuts3, nuts3_ntile_sy, sy_avg_resid,
            nuts3_ntile_gf, native_avg_resid,
            nuts3_ntile_rf, rel_frnd)

##### SYRIAN MIGRANT MOVES
sy_move_matrix_dat <- sy_to_native_dat %>%
    select(rid, unique_move_num, first_quarter_period2, loc1, loc2) %>%
    left_join(ntiles_dat, by=c("loc1"="nuts3")) %>%
    left_join(ntiles_dat, by=c("loc2"="nuts3"), suffix=c("1", "2")) %>%
    mutate(
      sy_diff = sy_avg_resid2 - sy_avg_resid1,
      native_diff = native_avg_resid2 - native_avg_resid1,
      rel_frnd_diff = rel_frnd2 - rel_frnd1)

# SY Friending
sy_move_matrix_dat %>%
    group_by(nuts3_ntile_sy1, nuts3_ntile_sy2) %>%
    summarise(n = n()) %>%
    pivot_wider(names_from = nuts3_ntile_sy2, values_from = n) %>%
    write_csv("output/sy_move_distribution_matrix.csv")

mean_friend_diff <- mean(sy_move_matrix_dat$sy_diff)
median_friend_diff <- median(sy_move_matrix_dat$sy_diff)
ggplot(sy_move_matrix_dat, aes(x=sy_diff)) +
    geom_histogram(aes(y=0.75*..density..), binwidth=0.75, color="white") +
    geom_vline(xintercept = median_friend_diff, color=c25[2], linetype = "dashed", size=1.2) +
    geom_vline(xintercept = mean_friend_diff, color=c25[1], linetype = "dotdash", size=1.2) +
    theme_classic() +
    labs(
      title = str_interp("Median = ${round(median_friend_diff,2)}\nMean = ${round(mean_friend_diff,2)}"),
      x="Dest-Origin SY Friending Integration", y="Share")

ggsave("output/sy_move_distribution.png", last_plot(), width=5, height=4)

# Relative Friendliness
sy_move_matrix_dat %>%
  group_by(nuts3_ntile_rf1, nuts3_ntile_rf2) %>%
  summarise(n = n()) %>%
  pivot_wider(names_from = nuts3_ntile_rf2, values_from = n) %>%
  write_csv("output/sy_move_distribution_matrix_rf.csv")

median_friend_diff <- median(sy_move_matrix_dat$rel_frnd_diff)
mean_friend_diff <- mean(sy_move_matrix_dat$rel_frnd_diff)
ggplot(sy_move_matrix_dat, aes(x=rel_frnd_diff)) +
    geom_histogram(aes(y=0.0075*..density..), color="white", binwidth=0.0075) +
    geom_vline(xintercept = median_friend_diff, color=c25[2], linetype = "dashed", size=1.2) +
    geom_vline(xintercept = mean_friend_diff, color=c25[1], linetype = "dotdash", size=1.2) +
    theme_classic() +
    labs(
      title = str_interp("Median = ${round(median_friend_diff,4)}\nMean = ${round(mean_friend_diff,4)}"),
      x="Dest-Origin Relative Friending", y="Share")

ggsave("output/sy_move_distribution_rf.png", last_plot(), width=5, height=4)


##### GERMAN NATIVE MOVES
native_move_matrix_dat <- native_to_sy_dat %>%
    select(rid, unique_move_num, first_quarter_period2, loc1, loc2) %>%
    left_join(ntiles_dat, by=c("loc1"="nuts3")) %>%
    left_join(ntiles_dat, by=c("loc2"="nuts3"), suffix=c("1", "2")) %>%
    mutate(
      sy_diff = sy_avg_resid2 - sy_avg_resid1,
      native_diff = native_avg_resid2 - native_avg_resid1,
      rel_frnd_diff = rel_frnd2 - rel_frnd1)

# SY Friending
native_move_matrix_dat %>%
    group_by(nuts3_ntile_sy1, nuts3_ntile_sy2) %>%
    summarise(n = n()) %>%
    pivot_wider(names_from = nuts3_ntile_sy2, values_from = n) %>%
    write_csv("output/native_move_distribution_matrix.csv")

mean_friend_diff <- mean(native_move_matrix_dat$sy_diff)
median_friend_diff <- median(native_move_matrix_dat$sy_diff)
ggplot(native_move_matrix_dat, aes(x=sy_diff)) +
    geom_histogram(aes(y=0.75*..density..), binwidth=0.75, color="white") +
    geom_vline(xintercept = median_friend_diff, color=c25[2], linetype = "dashed", size=1.2) +
    geom_vline(xintercept = mean_friend_diff, color=c25[1], linetype = "dotdash", size=1.2) +
    theme_classic() +
    labs(
      title = str_interp("Median = ${round(median_friend_diff,2)}\nMean = ${round(mean_friend_diff,2)}"),
      x="Dest-Origin SY Friending Integration", y="Share")

ggsave("output/native_move_distribution.png", last_plot(), width=5, height=4)

# Relative Friendliness
native_move_matrix_dat %>%
  group_by(nuts3_ntile_rf1, nuts3_ntile_rf2) %>%
  summarise(n = n()) %>%
  pivot_wider(names_from = nuts3_ntile_rf2, values_from = n) %>%
  write_csv("output/native_move_distribution_matrix_rf.csv")

mean_friend_diff <- mean(native_move_matrix_dat$rel_frnd_diff)
median_friend_diff <- median(native_move_matrix_dat$rel_frnd_diff)
ggplot(native_move_matrix_dat, aes(x=rel_frnd_diff)) +
    geom_histogram(aes(y=0.0075*..density..), binwidth=0.0075, color="white") +
    geom_vline(xintercept = median_friend_diff, color=c25[2], linetype = "dashed", size=1.2) +
    geom_vline(xintercept = mean_friend_diff, color=c25[1], linetype = "dotdash", size=1.2) +
    theme_classic() +
    labs(
      title = str_interp("Median = ${round(median_friend_diff,4)}\nMean = ${round(mean_friend_diff,4)}"),
      x="Dest-Origin Relative Friending", y="Share")

ggsave("output/native_move_distribution_rf.png", last_plot(), width=5, height=4)


######################################################
## 2. Add controls and residualize on native usage ##
#####################################################

### Make control 0 = SY pop / Native Pop (for relative friending)
# See the README for instructions on how to access this data
sy_native_pop_ratio <- read_csv("input/moves_levels_sy_native_pop_ratio.csv")

# Make the 1 year looking forward average of pop
sy_native_pop_ratio <- sy_native_pop_ratio %>%
  arrange(nuts3, quarter) %>%
  group_by(nuts3) %>%
  mutate(sy_native_ratio_yr =
           (sy_native_ratio + lead(sy_native_ratio, 1) + lead(sy_native_ratio, 2) + lead(sy_native_ratio, 3))/4) %>%
  ungroup


### Make control 1 = Residualization on intensive margin ###
# See the README for instructions on how to access this data
residualize_intensive <- read_csv("input/movers_plots_ntiles_resid_intesnive.csv")

### Make control 2 = Residualization on extensive margin ###
# See the README for instructions on how to access these data
native_pop_fb <- read_csv("input/movers_plots_ntiles_resid_extensive.csv")

# See the README for instructions on how to access these data
kreis_xw <- read_csv("input/nuts3_DE_xw.csv")
kreis_demos <- read_csv("input/kreis_total_pop_by_age_gender.csv")

kreis_to_exclude <- unique(filter(kreis_demos, year == 2019, is.na(frac_syr_tot))$ags)

residualize_extensive <- kreis_demos %>%
  filter(year == 2019) %>%
  left_join(kreis_xw, by=c("ags"="ID")) %>%
  mutate(pop_native = pop_tot * (1-frac_for_tot)) %>%
  select(nuts3=NUTS3, ags, pop_tot, pop_native) %>%
  left_join(native_pop_fb, by=c("nuts3"="curr_nuts3")) %>%
  mutate(share_natives_on_fb = n_native_fb/pop_native)

# Impute the bad data with overall mean
impute_val <- mean(filter(residualize_extensive, !ags %in% kreis_to_exclude)$share_natives_on_fb)
residualize_extensive <- residualize_extensive %>%
  mutate(share_natives_on_fb = if_else(ags %in% kreis_to_exclude, impute_val, share_natives_on_fb)) %>%
  select(nuts3, share_natives_on_fb)


#### Add controls to the datasets #####
sy_to_native_dat <-
  add_controls_to_final_levels_dat(sy_to_native_dat, sy_native_pop_ratio, residualize_intensive, residualize_extensive)

native_to_sy_dat <-
  add_controls_to_final_levels_dat(native_to_sy_dat, sy_native_pop_ratio, residualize_intensive, residualize_extensive)


#### Residualize on usage controls ####

## SY to Native Friending
pre_lhs_to_use_sy <- c("pre_makes_nat_lcl_in_quarter", "pre_avg_makes_nat_lcl_in_quarter.split1", "pre_avg_makes_nat_lcl_in_quarter.split2", "pre_avg_makes_nat_lcl_in_quarter")
post_lhs_to_use_sy <- c("post_makes_nat_lcl_in_quarter", "post_avg_makes_nat_lcl_in_quarter.split1", "post_avg_makes_nat_lcl_in_quarter.split2", "post_avg_makes_nat_lcl_in_quarter")

sy_to_native_dat <- residualize_on_native_usage(sy_to_native_dat, pre_lhs_to_use_sy, post_lhs_to_use_sy) %>%
  mutate(
    change_in_makes_nat_lcl_in_quarter_resid = post_makes_nat_lcl_in_quarter_resid - pre_makes_nat_lcl_in_quarter_resid,
    change_in_avg_makes_nat_lcl_in_quarter.split1_resid = post_avg_makes_nat_lcl_in_quarter.split1_resid - pre_avg_makes_nat_lcl_in_quarter.split1_resid,
    change_in_avg_makes_nat_lcl_in_quarter.split2_resid = post_avg_makes_nat_lcl_in_quarter.split2_resid - pre_avg_makes_nat_lcl_in_quarter.split2_resid)

## Native General Friendliness
pre_lhs_to_use_nat_gf <- c("pre_total_nat_lcl", "pre_avg_total_nat_lcl", "pre_avg_total_nat_lcl.split1", "pre_avg_total_nat_lcl.split2")
post_lhs_to_use_nat_gf <- c("post_total_nat_lcl", "post_avg_total_nat_lcl", "post_avg_total_nat_lcl.split1", "post_avg_total_nat_lcl.split2")

native_to_sy_dat <- residualize_on_native_usage(native_to_sy_dat, pre_lhs_to_use_nat_gf, post_lhs_to_use_nat_gf) %>%
  mutate(
    change_in_total_nat_lcl_resid = post_total_nat_lcl_resid - pre_total_nat_lcl_resid,
    change_in_avg_total_nat_lcl.split1_resid = post_avg_total_nat_lcl.split1_resid - pre_avg_total_nat_lcl.split1_resid,
    change_in_avg_total_nat_lcl.split2_resid = post_avg_total_nat_lcl.split2_resid - pre_avg_total_nat_lcl.split2_resid)

## Native-based relative friending
native_to_sy_dat <- native_to_sy_dat %>%
  mutate(
    # The ratio of ratios
    pre_rf_ratio = pre_relative_frnd / sy_native_ratio_yr1,
    post_rf_ratio = post_relative_frnd / sy_native_ratio_yr2,
    pre_avg_rf_ratio = pre_avg_relative_frnd / sy_native_ratio_yr1,
    post_avg_rf_ratio = post_avg_relative_frnd / sy_native_ratio_yr2,
    pre_avg_rf_ratio.split1 = pre_avg_relative_frnd.split1 / sy_native_ratio_yr1,
    post_avg_rf_ratio.split1 = post_avg_relative_frnd.split1 / sy_native_ratio_yr2,
    pre_avg_rf_ratio.split2 = pre_avg_relative_frnd.split2 / sy_native_ratio_yr1,
    post_avg_rf_ratio.split2 = post_avg_relative_frnd.split2 / sy_native_ratio_yr2,

    # The unresid outcomes
    change_in_rf_ratio = post_rf_ratio - pre_rf_ratio,
    change_in_avg_rf_ratio.split1 = post_avg_rf_ratio.split1 - pre_avg_rf_ratio.split1,
    change_in_avg_rf_ratio.split2 = post_avg_rf_ratio.split2 - pre_avg_rf_ratio.split2)

pre_lhs_to_use_nat_rf <- c("pre_rf_ratio", "pre_avg_rf_ratio", "pre_avg_rf_ratio.split1", "pre_avg_rf_ratio.split2")
post_lhs_to_use_nat_rf <- c("post_rf_ratio", "post_avg_rf_ratio", "post_avg_rf_ratio.split1", "post_avg_rf_ratio.split2")

native_to_sy_dat.rf <- native_to_sy_dat %>%
  filter(is.finite(pre_rf_ratio) & is.finite(pre_avg_rf_ratio.split1) & is.finite(pre_avg_rf_ratio.split2) &
           is.finite(post_rf_ratio) & is.finite(post_avg_rf_ratio.split1) & is.finite(post_avg_rf_ratio.split2)) %>%
  residualize_on_native_usage(pre_lhs_to_use_nat_rf, post_lhs_to_use_nat_rf) %>%
  mutate(
    change_in_rf_ratio_resid = post_rf_ratio_resid - pre_rf_ratio_resid,
    change_in_avg_rf_ratio.split1_resid = post_avg_rf_ratio.split1_resid - pre_avg_rf_ratio.split1_resid,
    change_in_avg_rf_ratio.split2_resid = post_avg_rf_ratio.split2_resid - pre_avg_rf_ratio.split2_resid
  )


#############################
## 3. Make summary tables ##
############################

##### Read in the ntiles dat to split by median #####
ntiles_dat <- read_csv("output/regional_measures.csv") %>%
  filter(sample == "all", measure == "n_frnd_nat_lcl") %>%
  select(nuts3, sy_avg_resid, sy_n) %>%
  mutate(nuts3_ntile = ntiles.wtd(sy_avg_resid, 2, sy_n)) %>%
  select(nuts3, nuts3_ntile)

### Make summary tables
make_movers_summary_table(sy_to_native_dat, ntiles_dat, "pre_makes_nat_lcl_in_quarter_resid", "pre_avg_makes_nat_lcl_in_quarter_resid") %>%
  write_csv("output/sy_movers_summary_table.csv")

make_movers_summary_table(native_to_sy_dat, ntiles_dat, "pre_total_nat_lcl_resid", "pre_avg_total_nat_lcl_resid") %>%
  write_csv("output/native_GF_movers_summary_table.csv")

make_movers_summary_table(native_to_sy_dat.rf, ntiles_dat, "pre_rf_ratio_resid", "pre_avg_rf_ratio_resid") %>%
  write_csv("output/native_RF_movers_summary_table.csv")

##########################################################
## 4. Final results for SY Movers Making Native Friends ##
##########################################################

######## 4A. Binscatters ###########

## Shrunk quarter controls -- matching by region, time, gender, and age
binscatter(sy_to_native_dat,
           c("change_in_avg_makes_nat_lcl_in_quarter.split1", "change_in_avg_makes_nat_lcl_in_quarter.split2"),
           "change_in_makes_nat_lcl_in_quarter",
           fes=c("first_quarter_period1")) +
  lims(x=c(-17,17), y=c(-17,17)) +
  labs(x = "Dest-Origin Quarterly Prob of SY Making Native/Local Frnd (%)", y = str_interp("\u0394 Quarterly Prob of Mover Making Native/Local Frnd"))

ggsave("output/sy_movers_levels.png", last_plot(), width=5, height=5)

## Shrunk quarter controls -- matching by region, time, gender, and age; residualizing on usage
binscatter(sy_to_native_dat,
           c("change_in_avg_makes_nat_lcl_in_quarter.split1_resid", "change_in_avg_makes_nat_lcl_in_quarter.split2_resid"),
           "change_in_makes_nat_lcl_in_quarter_resid",
           fes=c("first_quarter_period1")) +
  lims(x=c(-17,17), y=c(-17,17)) +
  labs(x = "Dest-Origin Quarterly Prob of SY Making Native/Local Frnd (%)", y = str_interp("\u0394 Quarterly Prob of Mover Making Native/Local Frnd"))

ggsave("output/sy_movers_levels_RESID.png", last_plot(), width=5, height=5)


####### 4B. Table corresponding to binscatter #########

# Filter out singletons so we get the correct 'N'
col1_2_dat <- sy_to_native_dat %>%
  group_by(first_quarter_period1) %>%
  mutate(n = n()) %>%
  filter(n > 1) %>%
  ungroup()

ymean1_2 <- round(mean(col1_2_dat$change_in_makes_nat_lcl_in_quarter_resid),3)

col3_dat <- col1_2_dat %>%
  group_by(loc1) %>%
  mutate(n = n()) %>%
  filter(n > 1) %>%
  ungroup()

ymean3 <- round(mean(col3_dat$change_in_makes_nat_lcl_in_quarter_resid),3)

col4_dat <- col1_2_dat %>%
  group_by(loc2) %>%
  mutate(n = n()) %>%
  filter(n > 1) %>%
  ungroup()

ymean4 <- round(mean(col4_dat$change_in_makes_nat_lcl_in_quarter_resid),3)

col1 <- felm(change_in_makes_nat_lcl_in_quarter_resid ~ 1 | first_quarter_period1 |
               (change_in_avg_makes_nat_lcl_in_quarter.split2_resid~change_in_avg_makes_nat_lcl_in_quarter.split1_resid) | 0,
             data = col1_2_dat)

col2 <- felm(change_in_makes_nat_lcl_in_quarter_resid ~ 1 | first_quarter_period1 |
               (pre_avg_makes_nat_lcl_in_quarter.split2_resid+post_avg_makes_nat_lcl_in_quarter.split2_resid~pre_avg_makes_nat_lcl_in_quarter.split1_resid+post_avg_makes_nat_lcl_in_quarter.split1_resid) | 0,
             data = col1_2_dat)

col3 <- felm(change_in_makes_nat_lcl_in_quarter_resid ~ 1 | first_quarter_period1 + loc1 |
               (change_in_avg_makes_nat_lcl_in_quarter.split2_resid~change_in_avg_makes_nat_lcl_in_quarter.split1_resid) | 0,
             data = col3_dat)

col4 <- felm(change_in_makes_nat_lcl_in_quarter_resid ~ 1 | first_quarter_period1 + loc2 |
               (change_in_avg_makes_nat_lcl_in_quarter.split2_resid~change_in_avg_makes_nat_lcl_in_quarter.split1_resid) | 0,
             data = col4_dat)

stargazer(col1, col2, col3, col4,
          keep.stat = c("n","rsq"),
          add.lines=list(
            c("Quarter FEs", "X", "X", "X", "X"),
            c("Pre NUTS3 FEs", "", "", "X", ""),
            c("Post NUTS3 FEs", "", "", "", "X"),
            c("ymean", ymean1_2, ymean1_2, ymean3, ymean4)),
          type="html",
          out="output/movers_levels_regression.doc")

#### 4C. Heterogeneity Graphs ####

to_plot <- make_heterogeneity_graphs(
  sy_to_native_dat,
  "change_in_makes_nat_lcl_in_quarter_resid ~ 1 | as.factor(first_quarter_period1) | (change_in_avg_makes_nat_lcl_in_quarter.split2_resid~change_in_avg_makes_nat_lcl_in_quarter.split1_resid) | 0")

max_val <- max(to_plot$coeffs + (to_plot$ses * 1.96))

to_plot %>%
  mutate(groups = factor(groups, levels=c("Age 40+", "Age 30-39", "Age <30", "Male", "Female", "All"))) %>%
  ggplot(aes(x=coeffs, y=groups, col=group_group)) +
  geom_point() +
  geom_errorbar(aes(xmin=coeffs-(ses*1.96), xmax=coeffs+(ses*1.96)), width=.3, size=.3,
                position=position_dodge(0.05)) +
  theme_bw() +
  scale_color_manual(values=c(c25[1], "black", c25[2])) +
  theme(legend.position = "none") +
  geom_vline(xintercept = 1, linetype = "longdash", col="gray") +
  labs(x = "Slope Coefficient", y="") +
  lims(x=c(0,max(c(max_val, 1.06))))

write_csv(to_plot, "output/sy_movers_levels_RESID_HETERO.csv")
ggsave("output/sy_movers_levels_RESID_HETERO.png", last_plot(), width=6, height=3)


##############################
## 5. General friendliness ##
#############################

#### 5A. Binscatters ####

# This produces Figure 6, panel (A)
binscatter(native_to_sy_dat,
           c("change_in_avg_total_nat_lcl.split1_resid", "change_in_avg_total_nat_lcl.split2_resid"),
           "change_in_total_nat_lcl_resid",
           fes=c("first_quarter_period1")) +
  lims(x=c(-8,10), y=c(-8,10)) +
  labs(x = "Dest-Origin Yearly General Friendliness", y = str_interp("\u0394 Mover Yearly General Friendliness"))

ggsave("output/native_movers_gf_levels_RESID.png", last_plot(), width=5, height=5)


##### 5B. Heterogeneity graphs #######

to_plot <- make_heterogeneity_graphs(
  native_to_sy_dat,
  "change_in_total_nat_lcl_resid ~ 1 | as.factor(first_quarter_period1) | (change_in_avg_total_nat_lcl.split2_resid~change_in_avg_total_nat_lcl.split1_resid) | 0")

max_val <- max(to_plot$coeffs + (to_plot$ses * 1.96))

to_plot %>%
  mutate(groups = factor(groups, levels=c("Age 40+", "Age 30-39", "Age <30", "Male", "Female", "All"))) %>%
  ggplot(aes(x=coeffs, y=groups, col=group_group)) +
  geom_point() +
  geom_errorbar(aes(xmin=coeffs-(ses*1.96), xmax=coeffs+(ses*1.96)), width=.3, size=.3,
                position=position_dodge(0.05)) +
  theme_bw() +
  scale_color_manual(values=c(c25[1], "black", c25[2])) +
  theme(legend.position = "none") +
  geom_vline(xintercept = 1, linetype = "longdash", col="gray") +
  labs(x = "Slope Coefficient", y="") +
  lims(x=c(0,max(c(max_val, 1.06))))

write_csv(to_plot, "output/native_movers_gf_levels_RESID_HETERO.csv")
ggsave("output/native_movers_gf_levels_RESID_HETERO.png", last_plot(), width=4, height=3)


##############################
## 6. Relative Friending ##
#############################

##### 6A. Binscatters ######

# This produces Figure 6, panel (B)
binscatter(native_to_sy_dat.rf,
           c("change_in_avg_rf_ratio.split1_resid", "change_in_avg_rf_ratio.split2_resid"),
           "change_in_rf_ratio_resid",
           fes=c("first_quarter_period1")) +
  labs(x = "Dest-Origin Yearly Relative Friending", y = str_interp("\u0394 Mover Yearly Relative Friending")) +
  lims(x=c(-0.3,0.25),y=c(-0.3,0.25))

ggsave("output/native_movers_rf_levels_RESID.png", last_plot(), width=5, height=5)


####### 6B. Heterogeneity graphs ######

to_plot <- make_heterogeneity_graphs(
  native_to_sy_dat.rf,
  "change_in_rf_ratio_resid ~ 1 | as.factor(first_quarter_period1) | (change_in_avg_rf_ratio.split2_resid~change_in_avg_rf_ratio.split1_resid) | 0")

max_val <- max(to_plot$coeffs + (to_plot$ses * 1.96))

to_plot %>%
  mutate(groups = factor(groups, levels=c("Age 40+", "Age 30-39", "Age <30", "Male", "Female", "All"))) %>%
  ggplot(aes(x=coeffs, y=groups, col=group_group)) +
  geom_point() +
  geom_errorbar(aes(xmin=coeffs-(ses*1.96), xmax=coeffs+(ses*1.96)), width=.3, size=.3,
                position=position_dodge(0.05)) +
  theme_bw() +
  scale_color_manual(values=c(c25[1], "black", c25[2])) +
  theme(legend.position = "none") +
  geom_vline(xintercept = 1, linetype = "longdash", col="gray") +
  labs(x = "Slope Coefficient", y="") +
  lims(x=c(0,max(c(max_val, 1.06))))

write_csv(to_plot, "output/native_movers_rf_levels_RESID_HETERO.csv")
ggsave("output/native_movers_rf_levels_RESID_HETERO.png", last_plot(), width=4, height=3)


#############################################
## 7. Appendix GF and RF regressions table ##
#############################################

#### Robustness table for the appendix #####

# Filter out singletons so we get the correct 'N'
col1_2_dat <- native_to_sy_dat %>%
  group_by(first_quarter_period1) %>%
  mutate(n = n()) %>%
  filter(n > 1) %>%
  ungroup()

ymean1_2 <- round(mean(col1_2_dat$change_in_total_nat_lcl_resid),3)

col3_dat <- col1_2_dat %>%
  group_by(loc1) %>%
  mutate(n = n()) %>%
  filter(n > 1) %>%
  ungroup()

ymean3 <- round(mean(col3_dat$change_in_total_nat_lcl_resid),3)

col4_dat <- col1_2_dat %>%
  group_by(loc2) %>%
  mutate(n = n()) %>%
  filter(n > 1) %>%
  ungroup()

ymean4 <- round(mean(col4_dat$change_in_total_nat_lcl_resid),3)

col5_6_dat <- native_to_sy_dat.rf %>%
  group_by(first_quarter_period1) %>%
  mutate(n = n()) %>%
  filter(n > 1) %>%
  ungroup()

ymean5_6 <- round(mean(col5_6_dat$change_in_rf_ratio),3)

col7_dat <- col5_6_dat %>%
  group_by(loc1) %>%
  mutate(n = n()) %>%
  filter(n > 1) %>%
  ungroup()

ymean7 <- round(mean(col7_dat$change_in_rf_ratio),3)

col8_dat <- col5_6_dat %>%
  group_by(loc2) %>%
  mutate(n = n()) %>%
  filter(n > 1) %>%
  ungroup()

ymean8 <- round(mean(col8_dat$change_in_rf_ratio),3)


col1 <- felm(change_in_total_nat_lcl_resid ~ 1 | as.factor(first_quarter_period1) |
               (change_in_avg_total_nat_lcl.split2_resid~change_in_avg_total_nat_lcl.split1_resid) | 0,
             data = col1_2_dat)

col2 <- felm(change_in_total_nat_lcl_resid ~ 1 | first_quarter_period1 |
               (pre_avg_total_nat_lcl.split2_resid+post_avg_total_nat_lcl.split2_resid~pre_avg_total_nat_lcl.split1_resid+post_avg_total_nat_lcl.split1_resid) | 0,
             data = col1_2_dat)

col3 <- felm(change_in_total_nat_lcl_resid ~ 1 | as.factor(first_quarter_period1) + loc1 |
               (change_in_avg_total_nat_lcl.split2_resid~change_in_avg_total_nat_lcl.split1_resid) | 0,
             data = col3_dat)

col4 <- felm(change_in_total_nat_lcl_resid ~ 1 | as.factor(first_quarter_period1) + loc2 |
               (change_in_avg_total_nat_lcl.split2_resid~change_in_avg_total_nat_lcl.split1_resid) | 0,
             data = col4_dat)

col5 <- felm(change_in_rf_ratio_resid ~ 1 | as.factor(first_quarter_period1) |
               (change_in_avg_rf_ratio.split2_resid~change_in_avg_rf_ratio.split1_resid) | 0,
             data = col5_6_dat)

col6 <- felm(change_in_rf_ratio_resid ~ 1 | as.factor(first_quarter_period1)|
               (pre_avg_rf_ratio.split2_resid+post_avg_rf_ratio.split2_resid~pre_avg_rf_ratio.split1_resid+post_avg_rf_ratio.split1_resid) | 0,
             data = col5_6_dat)

col7 <- felm(change_in_rf_ratio_resid ~ 1 | as.factor(first_quarter_period1) + loc1 |
               (change_in_avg_rf_ratio.split2_resid~change_in_avg_rf_ratio.split1_resid) | 0,
             data = col7_dat)

col8 <- felm(change_in_rf_ratio_resid ~ 1 | as.factor(first_quarter_period1) + loc2 |
               (change_in_avg_rf_ratio.split2_resid~change_in_avg_rf_ratio.split1_resid) | 0,
             data = col8_dat)

stargazer(col1, col2, col3, col4,
          col5, col6, col7, col8,
          keep.stat = c("n","rsq"),
          add.lines=list(
            c("Quarter FEs", "X", "X", "X", "X", "X", "X", "X", "X"),
            c("Pre NUTS3 FEs", "", "", "X", "", "", "", "X", ""),
            c("Post NUTS3 FEs", "", "", "", "X", "", "", "", "X"),
            c("ymean", ymean1_2, ymean1_2, ymean3, ymean4, ymean5_6, ymean5_6, ymean7, ymean8)),
          type="html",
          out="output/movers_levels_regression_NATIVE.doc")
